


********************************************************************
*Set workding directory
cd ""

use "Forecast2024_Replication.dta", clear

xtset state_initnum year, delta(4)
********************************************************************

*********************
***Table 1: 1980-2020
*********************
reg PerDem2Party l.PerDem2Party_dev pres_approval_2024 econ_conditions_2024 ///
democrat_p_state l.democrat_p_state ///
democrat_vp_state Anderson Perot2 if year <=2020

***********************
***Before-the-fact/One-step-ahead Forecasts
***********************
*As noted in the text, adjustments to the presidential approval and economic conditions
*variables are made based on previous data. For this reason, each year's forecast
*relies on a different variable. The year in the variable name indicates what election
*year to forecast. So econ_conditions_2024 pres_approval_2024 should be used for 
*the 2024 election forecast (these variables incorporate information from 1980-2024).
***********************
*WITHOUT incumbent VP running adjustment
*Used to test whether adjustment improves forecasts
gen yhatNOvpadj = .
forvalues i = 1984(4)2024 {
		local yr=`i'
quietly: reg PerDem2Party l.PerDem2Party_dev pres_approval_`yr' econ_conditions_`yr' ///
south_conf Anderson Perot2 democrat_p_state l.democrat_p_state ///
democrat_vp_state if year>=1980 & year<`yr'
*Calculate one-step-ahead forecast
		predict yhat`yr' if year==`yr'
		replace yhatNOvpadj = yhat`yr' if year==`yr'
		drop yhat`yr'
		}
gen correct_forecastNOvpadj = 0
recode correct_forecastNOvpadj 0=1 if PerDem2Party<50 & yhatNOvpadj<50 & yhatNOvpadj!=. | PerDem2Party>50 & yhatNOvpadj>50 &  yhatNOvpadj!=.
***********************

***********************
*WITH incumbent VP running adjustment
	gen yhatall = .
forvalues i = 1984(4)2024 {
		local drop all
		local yr=`i'
*This changes the variable name, which is used below, but does NOT change
*any values at this point
gen vpadj_pres_approval_`yr' = pres_approval_`yr'
quietly: reg PerDem2Party l.PerDem2Party_dev vpadj_pres_approval_`yr' econ_conditions_`yr' ///
south_conf Anderson Perot2 democrat_p_state l.democrat_p_state ///
democrat_vp_state if year>=1980 & year<`yr'

*After estimating the model, we shift approval to account for dif b/t 
*incumbent pres and VP who ran in 1988, 2000, and 2024
*Positive value becuase Bush's approval was lower than Reagan. This helps Dems. 
*See Online Appendix 6 for specific surveys used to obtain these values.
replace vpadj_pres_approval_`yr' = vpadj_pres_approval_`yr'+5 if year==1988
*Negative value, Gore approval was less than Clinton's and this hurts Dems
replace vpadj_pres_approval_`yr' = vpadj_pres_approval_`yr'-8.5 if year==2000
*Harris: positive value, higher approval than Biden helps Dems
replace vpadj_pres_approval_`yr' = vpadj_pres_approval_`yr'+8.2 if year==2024

*Calculate one-step-ahead forecast
		predict yhat`yr' if year==`yr'
		replace yhatall = yhat`yr' if year==`yr'
}
***********************

*Identify states correctly forecasted
gen correct_forecast = 0
recode correct_forecast 0=1 if PerDem2Party<50 & yhatall<50 & yhatall!=. | PerDem2Party>50 & yhatall>50 &  yhatall!=.

***TEST VP Adjustment
*Without incumbent VP adjustment
tab year correct_forecastNOvpadj if year==1988
*With incumbent VP adjustment
tab year correct_forecast if year==1988
*Without incumbent VP adjustment
tab year correct_forecastNOvpadj if year==2000
*With incumbent VP adjustment
tab year correct_forecast if year==2000
***********************

		
***********************
***Percent of States Correctly forecasted 
tab correct_forecast if year>=1984 & year<=2020
***Since 2000
tab correct_forecast if year>2000 & year<=2020
***********************


***********************
***Absolute mean error
gen absmeanerror = abs(yhatall-PerDem2Party)
sum absmeanerror
list year cstate_initial PerDem2Party yhatall absmeanerror if absmeanerror>=10 & year>=1984 & year<=2020

***Since 2000
sum absmeanerror if year>2000 & year<=2020
***********************


***********************
*How many states w/in 5% points?
***********************
list cstate_initial yhat2024 if yhat2024>45 & yhat2024<55

list cstate_initial yhat2024 if yhat2024>47.5 & yhat2024<52.5

***************************************
***************************************
***Which 3rd-Party Candidates should be in the Model?
***************************************
***************************************
*Should Anderson (who ran in 80) be included in the 1984 forecast? (Yes)
*Looking back to 1980, we can see if including % vote for Anderson in each state would have improved model fit, indicating Anderson's state vote share was correlated with two-party vote share. If yes, we should include in 1984, because we expect this percentage will help account for some of the two-party vote in 1984. Since Anderson coded for 1984, after he ran, need to use f.Anderson
quietly reg PerDem2Party l.PerDem2Party_dev pres_approval_1980 econ_conditions_1980 south_conf f.Anderson democrat_p_state l.democrat_p_state democrat_vp_state if year==1980
est store m1
quietly reg PerDem2Party l.PerDem2Party_dev pres_approval_1980 econ_conditions_1980 south_conf democrat_p_state l.democrat_p_state democrat_vp_state if year==1980
est store m2
lrtest m1 m2

*Perot 1992 (No)
quietly reg PerDem2Party l.PerDem2Party_dev pres_approval_1992 econ_conditions_1992 south_conf f.Perot1 democrat_p_state l.democrat_p_state democrat_vp_state if year==1992
est store m1
quietly reg PerDem2Party l.PerDem2Party_dev pres_approval_1992 econ_conditions_1992 south_conf democrat_p_state l.democrat_p_state democrat_vp_state if year==1992
est store m2
lrtest m1 m2
*Further evidence we should not include Perot in 1996 model (not significant)
reg PerDem2Party l.PerDem2Party_dev pres_approval_1996 econ_conditions_1996 south_conf Perot1 democrat_p_state l.democrat_p_state democrat_vp_state if year==1996


*Perot's 2nd presidential bid (1996) is signficiant in the 2000 model, and improves model fit, indicating we should include.
quietly reg PerDem2Party l.PerDem2Party_dev pres_approval_2000 econ_conditions_2000 south_conf Perot2 democrat_p_state l.democrat_p_state democrat_vp_state if year==2000
est store m1
quietly reg PerDem2Party l.PerDem2Party_dev pres_approval_2000 econ_conditions_2000 south_conf democrat_p_state l.democrat_p_state democrat_vp_state if year==2000
est store m2
lrtest m1 m2
***************************************
***************************************
drop yhatNOvpadj- correct_forecastNOvpadj
drop vpadj_pres_approval_1984-_est_m2


**********************************
***Descriptive Statistics in Appendix 7, Table A-3
*Preserve data before changing below
*Must run this entire section together for restore command to work.
preserve

foreach v of varlist PerDem2Party lag_PerDem2Party_dev  pres_approval_2024 econ_conditions_2024 democrat_p_state democrat_vp_state Anderson Perot2 {
    local call `call' (mean) mean`v' = `v'
    local call `call' (sd) sd`v' = `v'
    local call `call' (min) min`v' = `v'
    local call `call' (max) max`v' = `v'
}
gen c = 1
collapse `call', by(c)
reshape long mean sd min max, i(c) j(which) string

format mean sd min max  %3.2f

list which mean sd min max

*Must restore data for following code to run
restore
**********************************
**********************************


**********************************
*2024 EC Prediction
**********************************
* load in electoral vote data
keep if year == 2024

*Merge #EC votes for each state
merge 1:1 State year using "Electoral_Votes_Final.dta"
drop _merge

gen dem_win= 1 if yhatall>50 & year==2024
gen dem_state_ECV= Votes*dem_win if year==2024

egen dem_national_ECV=sum(dem_state_ECV) if year==2024

* The electoral votesdatafile is missing 2 from Maine and 3 from Nebraska due to CD allocation. I add them back in here based on vote share shift and statewide vote share

* Create vars for previous year values statewide and by district
generate PY_Maine = ((435072)/(435072+360737))*100
generate PY_Maine_D1 = ((266376)/(266376+164045))*100
generate PY_Maine_D2 = ((168696)/(168696+196692))*100

generate PY_Nebraska = ((374583)/(374583+556846))*100
generate PY_Nebraska_D1 = ((132261)/(132261+180290))*100
generate PY_Nebraska_D2 = ((176468)/(176468+154377))*100
generate PY_Nebraska_D3 = ((65854)/(65854+222179))*100

* calculate statewide shift and apply to each district
* Extract the value of predicted vote share for maine
summarize yhatall if State == "Maine"

* Store the extracted value in a scalar
scalar maine_pred = r(mean)
display maine_pred
* calculate statewide shift and apply to each district
gen maine_shift = maine_pred - PY_Maine
generate Maine_D1 = maine_shift + PY_Maine_D1
generate Maine_D2 = maine_shift + PY_Maine_D2

* Same for Nebraska
summarize yhatall if State == "Nebraska"
scalar nebraska_pred = r(mean)

tab dem_national_ECV

generate Nebraska_shift = nebraska_pred - PY_Nebraska
generate Nebraska_D1 = Nebraska_shift + PY_Nebraska_D1
generate Nebraska_D2 = Nebraska_shift + PY_Nebraska_D2
generate Nebraska_D3 = Nebraska_shift + PY_Nebraska_D3

replace dem_national_ECV = dem_national_ECV + 1 if Maine_D1 >= 50
replace dem_national_ECV = dem_national_ECV + 1 if Maine_D2 >= 50
replace dem_national_ECV = dem_national_ECV + 1 if Nebraska_D1 >= 50
replace dem_national_ECV = dem_national_ECV + 1 if Nebraska_D2 >= 50
replace dem_national_ECV = dem_national_ECV + 1 if Nebraska_D3 >= 50

***************************
*Democrat EC votes
tab dem_national_ECV
*Republican EC votes
sum dem_national_ECV
di (538-`r(mean)')
****************************

